#!bin/bash
# Function: prepare raw data by doing what are specified in the echo commands below
# Usage: go to project analysis directory, sh scripts/prepare_raw_data_v1.0.sh <path/to/raw-data-tar.gz in project storage directory>
    
echo "Copying raw data files…"
cd raw_data
cp $1 .
    
echo "Checking data integrity by MD5…"
cp `dirname $1`/md5.txt .
md5sum -c md5.txt | tee md5_result.txt   # print OK if data is intact
    
echo "Decompressing…"
tar -xzvf `basename $1`

echo "Deleting original file to save space..."
rm `basename $1`
    
echo "Renaming…"
for file in *.fastq.gz
do
newfile=`echo $file | awk -F_ '{print $2,$7 ".fastq.gz"}' OFS=_`
echo "$file -> $newfile"
mv $file $newfile
done
    
echo "Extracting sample names…"
samplenames=(`ls *_R1.fastq.gz | sed 's/_R1.fastq.gz//'`)
echo -n "Samples: "
echo ${samplenames[@]} | tee samplenames.txt
for i in ${!samplenames[@]}; do echo "$(($i+1)): ${samplenames[$i]}" >> samplenames_lookup.txt; done
samplenumber=${#samplenames[@]}
echo "Sample number: $samplenumber"
    
echo "Changing file permission to read-only and directory permission to read/execute-only…"
chmod 440 *
cd ..
chmod 550 raw_data
    
echo "Done"

